//package com.zxl.grocery.webmagic.processor;
//
//
//import com.zxl.grocery.common.persistence.model.CrawlerNews;
//import com.zxl.grocery.core.util.Constant;
//import com.zxl.grocery.core.webmagic.pipeline.NewsPipeline;
//import org.apache.commons.lang3.StringUtils;
//import us.codecraft.webmagic.Page;
//import us.codecraft.webmagic.Site;
//import us.codecraft.webmagic.Spider;
//import us.codecraft.webmagic.processor.PageProcessor;
//import us.codecraft.webmagic.selector.Selectable;
//
//import java.util.List;
//
///**
// * info:简书首页爬虫
// * Created by Lzx on 2017/7/12.
// */
//public class JianShuProcessor implements PageProcessor {
//
//    private Site site = Site.me()
//            .setDomain("jianshu.com")
//            .setSleepTime(100)
//            .setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36");
//    ;
//
//    public static final String list = "http://www.jianshu.com";
//
//    @Override
//    public void process(Page page) {
//        if (page.getUrl().regex(list).match()) {
//            List<Selectable> list=page.getHtml().xpath("//ul[@class='article-list thumbnails']/li").nodes();
//            for (Selectable s : list) {
//                String title=s.xpath("//div/h4/a/text()").toString();
//                String link=s.xpath("//div/h4").links().toString();
//                if (StringUtils.isNotEmpty(title) && StringUtils.isNotEmpty(link)) {
//                    CrawlerNews news=new CrawlerNews();
//                    news.setTitle(title);
//                    news.setInfo(title);
//                    news.setLink(link);
//                    news.setTypeId(Constant.Type_JianShu);
//                    news.setSourcesId(Constant.Sources_JianShu);
//                    page.putField("news"+title, news);
//                }
//            }
//        }
//    }
//
//    @Override
//    public Site getSite() {
//        return site;
//    }
//
//    public static void main(String[] args) {
//        Spider spider= Spider.create(new JianShuProcessor());
//        spider.addUrl("http://www.jianshu.com");
//        spider.addPipeline(new NewsPipeline());
//        spider.thread(5);
//        spider.setExitWhenComplete(true);
//        spider.start();
//    }
//}
